//+------------------------------------------------------------------+
//|                   Feature engineering Timeseries forecasting.mq5 |
//|                                     Copyright 2023, Omega Joctan |
//|                        https://www.mql5.com/en/users/omegajoctan |
//+------------------------------------------------------------------+
#property copyright "Copyright 2023, Omega Joctan"
#property link      "https://www.mql5.com/en/users/omegajoctan"
#property version   "1.00"

input int bars = 1000;
input ENUM_TIMEFRAMES timeframe = PERIOD_D1;
input uint start_bar = 2; //StartBar|Must be >= 1

struct ohlc_struct 
{
   vector open;
   vector high;
   vector low;
   vector close;
   
   matrix MATRIX; //this stores all the vectors all-together
   
   void AddCopyRates(string symbol, ENUM_TIMEFRAMES tf, ulong start, ulong size)
    {
      open.CopyRates(symbol, tf, COPY_RATES_OPEN, start, size); 
      high.CopyRates(symbol, tf, COPY_RATES_HIGH, start, size); 
      low.CopyRates(symbol, tf, COPY_RATES_LOW, start, size); 
      close.CopyRates(symbol, tf, COPY_RATES_CLOSE, start, size); 
      
      this.MATRIX.Resize(open.Size(), 4); //we resize it to match one of the vector since all vectors are of the same size
      
      this.MATRIX.Col(open, 0);
      this.MATRIX.Col(high, 1);
      this.MATRIX.Col(low, 2);
      this.MATRIX.Col(close, 3);
    }
};

matrix DATASET = {}; //All data will be saved in this matrix
string csv_header;
//+------------------------------------------------------------------+
//| Script program start function                                    |
//+------------------------------------------------------------------+
void OnStart()
  {
  
  if (start_bar<1)
   {
      Print("The starting bar must be >= 1");
      return;
   }

//--- getting Open, high, low and close prices
   
   ohlc_struct OHLC;
   vector time_vector; //we wanna add time vector 
   
   OHLC.AddCopyRates(Symbol(), timeframe, start_bar, bars);
   time_vector.CopyRates(Symbol(), timeframe, COPY_RATES_TIME, start_bar, bars); //copy the time in seconds
   
//--- Getting the lagged values of Open, High, low and close prices

   ohlc_struct  lag_1;
   lag_1.AddCopyRates(Symbol(), timeframe, start_bar+1, bars);
   
   ohlc_struct  lag_2;
   lag_2.AddCopyRates(Symbol(), timeframe, start_bar+2, bars);
   
   ohlc_struct  lag_3;
   lag_3.AddCopyRates(Symbol(), timeframe, start_bar+3, bars);

//--- Adding the lagged features to the dataset matrix 

   DATASET = concatenate(DATASET, OHLC.MATRIX);
   DATASET = concatenate(DATASET, lag_1.MATRIX);
   DATASET = concatenate(DATASET, lag_2.MATRIX);
   DATASET = concatenate(DATASET, lag_3.MATRIX);
   
   csv_header="OPEN,HIGH,LOW,CLOSE"+",OPEN_LAG1,HIGH_LAG1,LOW_LAG1,CLOSE_LAG1"+",OPEN_LAG2,HIGH_LAG2,LOW_LAG2,CLOSE_LAG2"+",OPEN_LAG3,HIGH_LAG3,LOW_LAG3,CLOSE_LAG3";
   //WriteCsv("Timeseries OHLC.csv",DATASET,header);
   
//---  Rolling Statistics 

   int ma_handle = iMA(Symbol(),timeframe,30,0,MODE_SMA,PRICE_WEIGHTED); //The Moving averaege for 30 days
   int stddev = iStdDev(Symbol(), timeframe, 7,0,MODE_SMA,PRICE_WEIGHTED); //The standard deviation for 7 days
   
   vector SMA_BUFF, STDDEV_BUFF;
   SMA_BUFF.CopyIndicatorBuffer(ma_handle,0,start_bar, bars);
   STDDEV_BUFF.CopyIndicatorBuffer(stddev, 0, start_bar, bars);
   
   DATASET = concatenate(DATASET, SMA_BUFF);
   DATASET = concatenate(DATASET, STDDEV_BUFF);
   
   csv_header += ",30DAY_SMA,7DAY_STDDEV";
   
//--- Datetime Features
   
   ulong size = time_vector.Size(); 
   vector DAY(size), DAYOFWEEK(size), DAYOFYEAR(size), MONTH(size);
   
   MqlDateTime time_struct;
   string time = "";
   for (ulong i=0; i<size; i++)
     {
       time = (string)datetime(time_vector[i]); //converting the data from seconds to date then to string
       TimeToStruct((datetime)StringToTime(time), time_struct); //convering the string time to date then assigning them to a structure
       
       DAY[i] = time_struct.day;
       DAYOFWEEK[i] = time_struct.day_of_week;
       DAYOFYEAR[i] = time_struct.day_of_year;
       MONTH[i] = time_struct.mon;
     }
   
   DATASET = concatenate(DATASET, DAY); //day of the month
   DATASET = concatenate(DATASET, DAYOFWEEK); //day of the week; monday, tuesday...
   DATASET = concatenate(DATASET, DAYOFYEAR); //a day out of approx 365 days in a calendar year
   DATASET = concatenate(DATASET, MONTH); //A month
   
   csv_header += ",DAYOFMONTH,DAYOFWEEK,DAYOFYEAR,MONTH";
   
//--- Differencing | Regular differencing lag 1
   
   vector diff_lag_1_open = OHLC.open - lag_1.open;
   vector diff_lag_1_high = OHLC.high - lag_1.high;
   vector diff_lag_1_low = OHLC.low - lag_1.low;
   vector diff_lag_1_close = OHLC.close - lag_1.close;
   
   DATASET = concatenate(DATASET, diff_lag_1_open);
   DATASET = concatenate(DATASET, diff_lag_1_high);
   DATASET = concatenate(DATASET, diff_lag_1_low);
   DATASET = concatenate(DATASET, diff_lag_1_close);
   
   csv_header += ",DIFF_LAG1_OPEN,DIFF_LAG1_HIGH,DIFF_LAG1_LOW,DIFF_LAG1_CLOSE";
   
//--- Target variables
   
   vector TARGET_CLOSE;
   TARGET_CLOSE.CopyRates(Symbol(), timeframe, COPY_RATES_CLOSE, start_bar-1, bars); //one bar forward   
   DATASET = concatenate(DATASET, TARGET_CLOSE);
   
   csv_header += ",TARGET_CLOSE";
   
   
   vector TARGET_OPEN;
   TARGET_OPEN.CopyRates(Symbol(), timeframe, COPY_RATES_OPEN, start_bar-1, bars); //one bar forward   
   DATASET = concatenate(DATASET, TARGET_OPEN);

   csv_header += ",TARGET_OPEN";
   
//---

   WriteCsv("Timeseries OHLC.csv",DATASET,csv_header);
  }
//+------------------------------------------------------------------+
//|                                                                  |
//+------------------------------------------------------------------+
bool WriteCsv(string csv_name, matrix &matrix_, string header_string="")
  {
   FileDelete(csv_name);
   int handle = FileOpen(csv_name,FILE_WRITE|FILE_CSV|FILE_ANSI,",",CP_UTF8);
   
   if (header_string == "" || header_string == NULL)
     for (ulong i=0; i<matrix_.Cols(); i++)
       header_string += "None"+ (i==matrix_.Cols()-1?"":","); 

   if(handle == INVALID_HANDLE)
     {
       printf("Invalid %s handle Error %d ",csv_name,GetLastError());
       return (false);
     }
            
   string concstring;
   vector row = {};
   
   string header[];
   
   ushort u_sep;
   u_sep = StringGetCharacter(",",0);
   StringSplit(header_string,u_sep, header);
   
   vector colsinrows = matrix_.Row(0);
   
   if (ArraySize(header) != (int)colsinrows.Size())
      {
         printf("headers=%d and columns=%d from the matrix vary is size ",ArraySize(header),colsinrows.Size());
         return false;
      }

//---

   string header_str = "";
   for (int i=0; i<ArraySize(header); i++)
      header_str += header[i] + (i+1 == colsinrows.Size() ? "" : ",");
   
   FileWrite(handle,header_str);
   
   FileSeek(handle,0,SEEK_SET);
   
   for(ulong i=0; i<matrix_.Rows() && !IsStopped(); i++)
     {
      ZeroMemory(concstring);

      row = matrix_.Row(i);
      for(ulong j=0, cols =1; j<row.Size() && !IsStopped(); j++, cols++)
         concstring += (string)NormalizeDouble(row[j],8) + (cols == matrix_.Cols() ? "" : ",");

      FileSeek(handle,0,SEEK_END);
      FileWrite(handle,concstring);
     }
        
   FileClose(handle);
   
   return (true);
  }
//+------------------------------------------------------------------+
//|                                                                  |
//|   Appends matrix mat1 to the end of mat2, setting axis=1 appends |
//|  mat2 at the end of mat1 columns while axis=0 will make the      |
//| function add the new matrix mat2 at the end of matrix mat1       |
//|                                                                  |
//+------------------------------------------------------------------+
matrix concatenate(const matrix &mat1, const matrix &mat2, int axis = 1)
 {
     matrix m_out = {};

     if ((axis == 0 && mat1.Cols() != mat2.Cols() && mat1.Cols()>0) || (axis == 1 && mat1.Rows() != mat2.Rows() && mat1.Rows()>0)) 
       {
         Print(__FUNCTION__, "Err | Dimensions mismatch for concatenation");
         return m_out;
       }

     if (axis == 0) {
         m_out.Resize(mat1.Rows() + mat2.Rows(), MathMax(mat1.Cols(), mat2.Cols()));

         for (ulong row = 0; row < mat1.Rows(); row++) {
             for (ulong col = 0; col < m_out.Cols(); col++) {
                 m_out[row][col] = mat1[row][col];
             }
         }

         for (ulong row = 0; row < mat2.Rows(); row++) {
             for (ulong col = 0; col < m_out.Cols(); col++) {
                 m_out[row + mat1.Rows()][col] = mat2[row][col];
             }
         }
     } else if (axis == 1) {
         m_out.Resize(MathMax(mat1.Rows(), mat2.Rows()), mat1.Cols() + mat2.Cols());

         for (ulong row = 0; row < m_out.Rows(); row++) {
             for (ulong col = 0; col < mat1.Cols(); col++) {
                 m_out[row][col] = mat1[row][col];
             }

             for (ulong col = 0; col < mat2.Cols(); col++) {
                 m_out[row][col + mat1.Cols()] = mat2[row][col];
             }
         }
     }
   return m_out;
 }
//+------------------------------------------------------------------+
//|                                                                  |
//+------------------------------------------------------------------+
matrix concatenate(const matrix &mat, const vector &v, int axis=1)
 {
   matrix ret= mat;
     
   ulong new_rows, new_cols;
   
   if (axis == 0) //place it along the rows
    {
      if (mat.Cols() == 0)
        ret.Resize(mat.Rows(), v.Size());
        
      new_rows = ret.Rows()+1; new_cols = ret.Cols();
                 
      if (v.Size() != new_cols)
        {
          Print(__FUNCTION__," Dimensions don't match the vector v needs to have the same size as the number of columns in the original matrix");
          return ret;
        }
      
      ret.Resize(new_rows, new_cols);
      ret.Row(v, new_rows-1);
    }
   else if (axis == 1)
     {
         if (mat.Rows() == 0)
           ret.Resize(v.Size(), ret.Cols());
           
        new_rows = ret.Rows(); new_cols = ret.Cols()+1;
        
        if (v.Size() != new_rows)
          {
            Print(__FUNCTION__," Dimensions don't match the vector v needs to have the same size as the number of rows in the original matrix");
            return ret;
          }
        
        ret.Resize(new_rows, new_cols);
        ret.Col(v, new_cols-1);
     }
   else 
     {
       Print(__FUNCTION__," Axis value Can either be 0 or 1");
       return ret;
     }

//---
   return ret;
 }
//+------------------------------------------------------------------+
//|                                                                  |
//+------------------------------------------------------------------+
